suppressPackageStartupMessages({
library(data.table)
library(DESeq2)
library(gplots)
library(here)
library(hyperSpec)
library(RColorBrewer)
library(tidyverse)
library(VennDiagram)
})
suppressMessages({
source(here("UPSCb-common/src/R/featureSelection.R"))
source(here("UPSCb-common/src/R/volcanoPlot.R"))
source(here("UPSCb-common/src/R/gopher.R"))
})
pal=brewer.pal(8,"Dark2")
hpal <- colorRampPalette(c("blue","white","red"))(100)
mar <- par("mar")
Load the genes of interests (goi)
gois <- list(
myb.goi=scan(here("doc/MYB-goi.txt"),what="character"),
mybr.goi=scan(here("doc/MYB-related-goi.txt"),what="character"),
wrky.goi=scan(here("doc/WRKY-goi.txt"),what="character"),
auxin.goi=scan(here("doc/auxin-related-goi.txt"),what="character"),
pectin.goi=scan(here("doc/pectin-related-goi.txt"),what="character"))
"line_plot" <- function(dds=dds,vst=vst,gene_id){
sel <- grepl(gene_id,rownames(vst))
stopifnot(sum(sel)==1)
p <- ggplot(bind_cols(as.data.frame(colData(dds)),
data.frame(value=vst[sel,])),
aes(x=Time,y=value,col=Experiment,group=Experiment)) +
geom_point() + geom_smooth() +
scale_y_continuous(name="VST expression") +
ggtitle(label=paste("Expression for: ",gene_id))
suppressMessages(suppressWarnings(plot(p)))
return(NULL)
}
"extract_results" <- function(dds,vst,contrast,
padj=0.01,lfc=0.5,
plot=TRUE,verbose=TRUE,
export=TRUE,default_dir=here("data/analysis/DE"),
default_prefix="DE-",
labels=colnames(dds),
sample_sel=1:ncol(dds)){
if(length(contrast)==1){
res <- results(dds,name=contrast)
} else {
res <- results(dds,contrast=contrast)
}
if(plot){
par(mar=c(5,5,5,5))
volcanoPlot(res)
par(mar=mar)
}
sel <- res$padj <= padj & abs(res$log2FoldChange) >= lfc & ! is.na(res$padj)
if(verbose){
message(sprintf("There are %s genes that are DE",sum(sel)))
}
if(export){
if(!dir.exists(default_dir)){
dir.create(default_dir,showWarnings=FALSE,recursive=TRUE,mode="0771")
}
write.csv(res,file=file.path(default_dir,paste0(default_prefix,"results.csv")))
write.csv(res[sel,],file.path(default_dir,paste0(default_prefix,"genes.csv")))
}
if(plot){
heatmap.2(t(scale(t(vst[sel,sample_sel]))),
distfun = pearson.dist,
hclustfun = function(X){hclust(X,method="ward.D2")},
trace="none",col=hpal,labRow = FALSE,
labCol=labels[sample_sel]
)
}
return(list(all=rownames(res[sel,]),
up=rownames(res[sel & res$log2FoldChange > 0,]),
dn=rownames(res[sel & res$log2FoldChange < 0,])))
}
load(here("data/analysis/salmon/Lacbi-all-dds.rda"))
vsd <- varianceStabilizingTransformation(dds,blind=FALSE)
vst <- assay(vsd)
vst <- vst - min(vst)
rownames(vst) <- substr(rownames(vst),12,17)
lapply(gois, function(goi){
pdf(here("data/analysis/DE",paste0(goi,"-Lacbi-candidates-line-plot.pdf")),width=12,height=8)
par(mfrow=c(2,3))
lapply(goi,function(x){
if ((x %in% rownames(vst))){
line_plot(dds,vst,x)
}
})
dev.off()
})
## $myb.goi
## png
## 2
##
## $mybr.goi
## png
## 2
##
## $wrky.goi
## png
## 2
##
## $auxin.goi
## png
## 2
##
## $pectin.goi
## png
## 2
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
plotDispEsts(dds)
The model used is:
Experiment * Time meaning that the Experiment and Time variable as well as their interaction Experiment:Time is considered. Because we cannot assume that these two variables explain all the variance in the data, there is also an Intercept for the linear model. This also implies that the model assumes ECM at 3 hours to be the baseline; i.e. everything is compared against it.
resultsNames(dds)
## [1] "Intercept" "Experiment_FLM_vs_ECM" "Time_7_vs_3"
## [4] "Time_14_vs_3" "Time_21_vs_3" "Time_28_vs_3"
## [7] "ExperimentFLM.Time7" "ExperimentFLM.Time14" "ExperimentFLM.Time21"
## [10] "ExperimentFLM.Time28"
In the following we look at the interaction specific genes; i.e. genes that changes at a given time transition in between experiments ### FLM vs. ECM at T3
Lb_3 <- extract_results(dds,vst,"Experiment_FLM_vs_ECM",
default_prefix="Lacbi_FLM-vs-ECM_T3_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==3)
## Loading required package: LSD
## There are 2100 genes that are DE
Here we want to conmbine the effect of FLM-ECM at time T3 and the specific FLM:T7 interaction
Lb_7 <- extract_results(dds,vst,c(0,1,0,0,0,0,1,0,0,0),
default_prefix="Lacbi_FLM-vs-ECM_T7_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==7)
## There are 1222 genes that are DE
Lb_14 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,1,0,0),
default_prefix="Lacbi_FLM-vs-ECM_T14_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==14)
## There are 362 genes that are DE
Lb_21 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,1,0),
default_prefix="Lacbi_FLM-vs-ECM_T21_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==21)
## There are 381 genes that are DE
Lb_28 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,0,1),
default_prefix="Lacbi_FLM-vs-ECM_T28_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==28)
## There are 673 genes that are DE
grid.newpage()
grid.draw(venn.diagram(list(T3=Lb_3$all,
T7=Lb_7$all,
T14=Lb_14$all,
T21=Lb_21$all,
T28=Lb_28$all),
NULL,
fill=pal[1:5]))
grid.newpage()
grid.draw(venn.diagram(list(T3=Lb_3$up,
T7=Lb_7$up,
T14=Lb_14$up,
T21=Lb_21$up,
T28=Lb_28$up),
NULL,
fill=pal[1:5]))
grid.newpage()
grid.draw(venn.diagram(list(T3=Lb_3$dn,
T7=Lb_7$dn,
T14=Lb_14$dn,
T21=Lb_21$dn,
T28=Lb_28$dn),
NULL,
fill=pal[1:5]))
res.list <- list(Lb_3=list(all=substr(Lb_3$all,12,17),
up=substr(Lb_3$up,12,17),
dn=substr(Lb_3$dn,12,17)),
Lb_7=list(all=substr(Lb_7$all,12,17),
up=substr(Lb_7$up,12,17),
dn=substr(Lb_7$dn,12,17)),
Lb_14=list(all=substr(Lb_14$all,12,17),
up=substr(Lb_14$up,12,17),
dn=substr(Lb_14$dn,12,17)),
Lb_21=list(all=substr(Lb_21$all,12,17),
up=substr(Lb_21$up,12,17),
dn=substr(Lb_21$dn,12,17)),
Lb_28=list(all=substr(Lb_28$all,12,17),
up=substr(Lb_28$up,12,17),
dn=substr(Lb_28$dn,12,17)))
background <- rownames(vst)[featureSelect(vst,dds$Experiment,exp=0.1)]
enr.list <- lapply(res.list,function(r){
lapply(r,gopher,background=background,task="go",url="lacbi2")
})
## Loading required package: jsonlite
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
## No enrichments found in task: go
## No enrichments found in task: go
## No enrichments found in task: go
## No enrichments found in task: go
dev.null <- lapply(names(enr.list),function(n){
r <- enr.list[[n]]
if (! is.null(r$all$go)){
write_delim(r$all$go,path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-all-DE-genes_GO-enrichment.txt")))))
write_delim(r$all$go[,c("id","padj")],path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-all-DE-genes_GO-enrichment_for-REVIGO.txt")))))
}
if (! is.null(r$up$go)){
write_csv(r$up$go,path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-up-DE-genes_GO-enrichment.txt")))))
write_delim(r$up$go[,c("id","padj")],path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-up-DE-genes_GO-enrichment_for-REVIGO.txt")))))
}
if (! is.null(r$dn$go)){
write_csv(r$dn$go,path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-down-DE-genes_GO-enrichment.txt")))))
write_delim(r$dn$go[,c("id","padj")],path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-down-DE-genes_GO-enrichment_for-REVIGO.txt")))))
}
})
load(here("data/analysis/salmon/Potra-104-127-139-removed-dds.rda"))
vsd <- varianceStabilizingTransformation(dds,blind=FALSE)
vst <- assay(vsd)
vst <- vst - min(vst)
line_plot(dds,vst,"Potra000962g07909")
## NULL
lapply(gois, function(goi){
pdf(here("data/analysis/DE",paste0(goi,"-Potra-candidates-line-plot.pdf")),width=12,height=8)
par(mfrow=c(2,3))
lapply(goi,function(x){
if ((x %in% rownames(vst))){
line_plot(dds,vst,x)
}
})
dev.off()
})
## $myb.goi
## png
## 2
##
## $mybr.goi
## png
## 2
##
## $wrky.goi
## png
## 2
##
## $auxin.goi
## png
## 2
##
## $pectin.goi
## png
## 2
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
plotDispEsts(dds)
The model used is:
Experiment * Time meaning that the Experiment and Time variable as well as their interaction Experiment:Time is considered. Because we cannot assume that these two variables explain all the variance in the data, there is also an Intercept for the linear model. This also implies that the model assumes Cont at 3 hours to be the baseline; i.e. everything is compared against it.
resultsNames(dds)
## [1] "Intercept" "Experiment_ECM_vs_Cont" "Time_7_vs_3"
## [4] "Time_14_vs_3" "Time_21_vs_3" "Time_28_vs_3"
## [7] "ExperimentECM.Time7" "ExperimentECM.Time14" "ExperimentECM.Time21"
## [10] "ExperimentECM.Time28"
In the following we look at the interaction specific genes; i.e. genes that changes at a given time transition in between experiments ### ECM vs. Cont at T3
Pa_3 <- extract_results(dds,vst,"Experiment_ECM_vs_Cont",
default_prefix="Potra_ECM-vs-Cont_T3_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==3)
## There are 230 genes that are DE
Pa_7 <- extract_results(dds,vst,c(0,1,0,0,0,0,1,0,0,0),
default_prefix="Potra_ECM-vs-Cont_T7_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==7)
## There are 205 genes that are DE
Pa_14 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,1,0,0),
default_prefix="Potra_ECM-vs-Cont_T14_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==14)
## There are 76 genes that are DE
Pa_21 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,1,0),
default_prefix="Potra_ECM-vs-Cont_T21_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==21)
## There are 1276 genes that are DE
Pa_28 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,0,1),
default_prefix="Potra_ECM-vs-Cont_T28_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==28)
## There are 416 genes that are DE
grid.newpage()
grid.draw(venn.diagram(list(T3=Pa_3$all,
T7=Pa_7$all,
T14=Pa_14$all,
T21=Pa_21$all,
T28=Pa_28$all),
NULL,
fill=pal[1:5]))
grid.newpage()
grid.draw(venn.diagram(list(T3=Pa_3$up,
T7=Pa_7$up,
T14=Pa_14$up,
T21=Pa_21$up,
T28=Pa_28$up),
NULL,
fill=pal[1:5]))
grid.newpage()
grid.draw(venn.diagram(list(T3=Pa_3$dn,
T7=Pa_7$dn,
T14=Pa_14$dn,
T21=Pa_21$dn,
T28=Pa_28$dn),
NULL,
fill=pal[1:5]))
res.list <- list(Pa_3=Pa_3,
Pa_7=Pa_7,
Pa_14=Pa_14,
Pa_21=Pa_21,
Pa_28=Pa_28)
background <- rownames(vst)[featureSelect(vst,dds$Experiment,exp=0.1)]
enr.list <- lapply(res.list,function(r){
lapply(r,gopher,background=background,task="go",url="potra")
})
## No enrichments found in task: go
## No enrichments found in task: go
## No enrichments found in task: go
dev.null <- lapply(names(enr.list),function(n){
r <- enr.list[[n]]
if (! is.null(r$all$go)){
write_delim(r$all$go,path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-all-DE-genes_GO-enrichment.txt")))))
write_delim(r$all$go[,c("id","padj")],path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-all-DE-genes_GO-enrichment_for-REVIGO.txt")))))
}
if (! is.null(r$up$go)){
write_csv(r$up$go,path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-up-DE-genes_GO-enrichment.txt")))))
write_delim(r$up$go[,c("id","padj")],path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-up-DE-genes_GO-enrichment_for-REVIGO.txt")))))
}
if (! is.null(r$dn$go)){
write_csv(r$dn$go,path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-down-DE-genes_GO-enrichment.txt")))))
write_delim(r$dn$go[,c("id","padj")],path=file.path(file.path(here("data/analysis/DE",
paste0(n,"-down-DE-genes_GO-enrichment_for-REVIGO.txt")))))
}
})
load(here("data/analysis/salmon/Potri-all-dds.rda"))
vsd <- varianceStabilizingTransformation(dds,blind=FALSE)
vst <- assay(vsd)
vst <- vst - min(vst)
line_plot(dds,vst,"Potri.006G134800")
## NULL
line_plot(dds,vst,"Potri.006G221800")
## NULL
line_plot(dds,vst,"Potri.002G173900")
## NULL
line_plot(dds,vst,"Potri.004G088100")
## NULL
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
plotDispEsts(dds)
The model used is:
Experiment * Time meaning that the Experiment and Time variable as well as their interaction Experiment:Time is considered. Because we cannot assume that these two variables explain all the variance in the data, there is also an Intercept for the linear model. This also implies that the model assumes Cont at 3 hours to be the baseline; i.e. everything is compared against it.
resultsNames(dds)
## [1] "Intercept" "Experiment_ECM_vs_Cont" "Time_7_vs_3"
## [4] "Time_14_vs_3" "Time_21_vs_3" "Time_28_vs_3"
## [7] "ExperimentECM.Time7" "ExperimentECM.Time14" "ExperimentECM.Time21"
## [10] "ExperimentECM.Time28"
In the following we look at the interaction specific genes; i.e. genes that changes at a given time transition in between experiments ### ECM vs. Cont at T3
Pa_3 <- extract_results(dds,vst,"Experiment_ECM_vs_Cont",
default_prefix="Potri_ECM-vs-Cont_T3_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==3)
## There are 311 genes that are DE
Pa_7 <- extract_results(dds,vst,c(0,1,0,0,0,0,1,0,0,0),
default_prefix="Potri_ECM-vs-Cont_T7_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==7)
## There are 219 genes that are DE
Pa_14 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,1,0,0),
default_prefix="Potri_ECM-vs-Cont_T14_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==14)
## There are 220 genes that are DE
Pa_21 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,1,0),
default_prefix="Potri_ECM-vs-Cont_T21_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==21)
## There are 1899 genes that are DE
Pa_28 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,0,1),
default_prefix="Potri_ECM-vs-Cont_T28_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==28)
## There are 355 genes that are DE
grid.newpage()
grid.draw(venn.diagram(list(T3=Pa_3$all,
T7=Pa_7$all,
T14=Pa_14$all,
T21=Pa_21$all,
T28=Pa_28$all),
NULL,
fill=pal[1:5]))
grid.newpage()
grid.draw(venn.diagram(list(T3=Pa_3$up,
T7=Pa_7$up,
T14=Pa_14$up,
T21=Pa_21$up,
T28=Pa_28$up),
NULL,
fill=pal[1:5]))
grid.newpage()
grid.draw(venn.diagram(list(T3=Pa_3$dn,
T7=Pa_7$dn,
T14=Pa_14$dn,
T21=Pa_21$dn,
T28=Pa_28$dn),
NULL,
fill=pal[1:5]))
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] jsonlite_1.6.1 LSD_4.0-0
## [3] limma_3.42.2 VennDiagram_1.6.20
## [5] futile.logger_1.4.3 forcats_0.4.0
## [7] stringr_1.4.0 dplyr_0.8.4
## [9] purrr_0.3.3 readr_1.3.1
## [11] tidyr_1.0.2 tibble_2.1.3
## [13] tidyverse_1.3.0 RColorBrewer_1.1-2
## [15] hyperSpec_0.99-20200213 xml2_1.2.2
## [17] ggplot2_3.2.1 lattice_0.20-40
## [19] here_0.1 gplots_3.0.3
## [21] DESeq2_1.26.0 SummarizedExperiment_1.16.1
## [23] DelayedArray_0.12.2 BiocParallel_1.20.1
## [25] matrixStats_0.55.0 Biobase_2.46.0
## [27] GenomicRanges_1.38.0 GenomeInfoDb_1.22.0
## [29] IRanges_2.20.2 S4Vectors_0.24.3
## [31] BiocGenerics_0.32.0 data.table_1.12.8
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 rprojroot_1.3-2 htmlTable_1.13.3
## [4] XVector_0.26.0 fs_1.3.1 base64enc_0.1-3
## [7] rstudioapi_0.11 farver_2.0.3 bit64_0.9-7
## [10] fansi_0.4.1 AnnotationDbi_1.48.0 lubridate_1.7.4
## [13] splines_3.6.2 geneplotter_1.64.0 knitr_1.28
## [16] Formula_1.2-3 broom_0.5.4 annotate_1.64.0
## [19] cluster_2.1.0 dbplyr_1.4.2 png_0.1-7
## [22] compiler_3.6.2 httr_1.4.1 backports_1.1.5
## [25] assertthat_0.2.1 Matrix_1.2-18 lazyeval_0.2.2
## [28] cli_2.0.1 formatR_1.7 acepack_1.4.1
## [31] htmltools_0.4.0 tools_3.6.2 gtable_0.3.0
## [34] glue_1.3.1 GenomeInfoDbData_1.2.2 Rcpp_1.0.3
## [37] cellranger_1.1.0 vctrs_0.2.3 gdata_2.18.0
## [40] nlme_3.1-144 xfun_0.12 rvest_0.3.5
## [43] testthat_2.3.1 lifecycle_0.1.0 gtools_3.8.1
## [46] XML_3.99-0.3 zlibbioc_1.32.0 scales_1.1.0
## [49] hms_0.5.3 lambda.r_1.2.4 curl_4.3
## [52] yaml_2.2.1 memoise_1.1.0 gridExtra_2.3
## [55] rpart_4.1-15 latticeExtra_0.6-29 stringi_1.4.6
## [58] RSQLite_2.2.0 highr_0.8 genefilter_1.68.0
## [61] checkmate_2.0.0 caTools_1.18.0 rlang_0.4.4
## [64] pkgconfig_2.0.3 bitops_1.0-6 evaluate_0.14
## [67] labeling_0.3 htmlwidgets_1.5.1 bit_1.1-15.2
## [70] tidyselect_1.0.0 magrittr_1.5 R6_2.4.1
## [73] generics_0.0.2 Hmisc_4.3-1 DBI_1.1.0
## [76] pillar_1.4.3 haven_2.2.0 foreign_0.8-75
## [79] withr_2.1.2 survival_3.1-8 RCurl_1.98-1.1
## [82] nnet_7.3-13 modelr_0.1.6 crayon_1.3.4
## [85] futile.options_1.0.1 KernSmooth_2.23-16 rmarkdown_2.1
## [88] jpeg_0.1-8.1 locfit_1.5-9.1 readxl_1.3.1
## [91] blob_1.2.1 reprex_0.3.0 digest_0.6.25
## [94] xtable_1.8-4 munsell_0.5.0